//===----------------------------------------------------------*- swift -*-===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See http://swift.org/LICENSE.txt for license information
// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
// simd.h overlays for Swift
//===----------------------------------------------------------------------===//

import Swift
import Darwin
@_exported import simd

% component = ['x','y','z','w']
% scalar_types = ['Float','Double','Int32']
% ctype = { 'Float':'float', 'Double':'double', 'Int32':'int' }
% llvm_type = { 'Float':'FPIEEE32', 'Double':'FPIEEE64', 'Int32':'Int32' }
% floating_types = ['Float','Double']
% cardinal = { 2:'two', 3:'three', 4:'four'}
% hash_scales = ['1', '3', '5', '11']
% one_minus_ulp = { 'Float':'0x1.fffffep-1', 'Double':'0x1.fffffffffffffp-1' }

% for type in scalar_types:
%   for size in [2, 3, 4]:
//    Workaround <rdar://problem/18900352>
%     vectype = ctype[type] + str(size)
%     llvm_vectype = "Vec" + str(size) + "x" + llvm_type[type]
%     vecsize = (8 if type == 'Double' else 4)*(2 if size == 2 else 4)
%     extractelement = "extractelement_" + llvm_vectype + "_Int32"
%     insertelement = "insertelement_" + llvm_vectype + "_" + llvm_type[type] + "_Int32"
%     is_floating = type in floating_types
%     wrap = "" if is_floating else "&"


/// A vector of ${cardinal[size]} `${type}`.  This corresponds to the C and
/// Obj-C type `vector_${vectype}` and the C++ type `simd::${vectype}`.
@_alignment(${vecsize})
public struct ${vectype} :
    ArrayLiteralConvertible, CustomDebugStringConvertible {

  public var _vector: Builtin.${llvm_vectype}

% for i in xrange(size):
  public var ${component[i]} : ${type} {
    @_transparent
    get {
      let elt = Builtin.${extractelement}(_vector,
        (${i} as Int32)._value)

      return ${type}(_bits: elt)
    }
    @_transparent
    set {
      _vector = Builtin.${insertelement}(_vector,
        newValue._value,
        (${i} as Int32)._value)
    }
  }
% end

  /// Initialize to the zero vector.
  @_transparent
  public init() { self.init(0) }

  @_transparent
  public init(_bits: Builtin.${llvm_vectype}) {
    _vector = _bits
  }

  /// Initialize a vector with the specified elements.
  @_transparent
  public init(${', '.join(map(lambda c: '_ ' + c + ': ' + type, component[:size]))}) {
    var v: Builtin.${llvm_vectype} = Builtin.zeroInitializer()
% for i in xrange(size):
    v = Builtin.${insertelement}(v,
      ${component[i]}._value,
      (${i} as Int32)._value)
% end
    _vector = v
  }

  /// Initialize a vector with the specified elements.
  @_transparent
  public init(${', '.join(map(lambda c: c + ': ' + type, component[:size]))}) {
    self.init(${', '.join(c for c in component[:size])})
  }

  /// Initialize to a vector with all elements equal to `scalar`.
  @_transparent
  public init(_ scalar: ${type}) {
    self.init(${', '.join(['scalar']*size)})
  }

  /// Initialize to a vector with elements taken from `array`.
  ///
  /// - Precondition: `array` must have exactly ${cardinal[size]} elements.
  public init(_ array: [${type}]) {
  _precondition(array.count == ${size},
                "${vectype} requires a ${cardinal[size]}-element array")
    self.init(${', '.join(map(lambda i:
              'array[' + str(i) + ']',
              range(size)))})
  }

  /// Initialize using `arrayLiteral`.
  ///
  /// - Precondition: the array literal must exactly ${cardinal[size]} elements.
  public init(arrayLiteral elements: ${type}...) { self.init(elements) }

  /// Access individual elements of the vector via subscript.
  public subscript(index: Int) -> ${type} {
    @_transparent
    get {
      _precondition(index >= 0, "vector index out of range")
      _precondition(index < ${size}, "vector index out of range")
      let elt = Builtin.${extractelement}(_vector,
        Int32(index)._value)
      return ${type}(_bits: elt)
    }
    @_transparent
    set(value) {
      _precondition(index >= 0, "vector index out of range")
      _precondition(index < ${size}, "vector index out of range")
      _vector = Builtin.${insertelement}(_vector,
        value._value,
        Int32(index)._value)
    }
  }

  /// Debug string representation
  public var debugDescription: String {
    return "${vectype}(${', '.join(map(lambda c:
                       '\\(' + c + ')',
                       component[:size]))})"
  }

  /// Helper for matrix debug representations
  internal var _descriptionAsArray: String {
    get {
      return "[${', '.join(map(lambda c:
               '\\(' + c + ')',
               component[:size]))}]"
    }
  }
}

/// Vector sum of `lhs` and `rhs`.
@inline(__always)
@warn_unused_result
public func ${wrap}+(lhs: ${vectype}, rhs: ${vectype}) -> ${vectype} {
  return ${vectype}(_bits:
%if is_floating:
    Builtin.fadd_${llvm_vectype}(
%else:
    Builtin.add_${llvm_vectype}(
%end
      lhs._vector, rhs._vector))
}

/// Vector difference of `lhs` and `rhs`.
@inline(__always)
@warn_unused_result
public func ${wrap}-(lhs: ${vectype}, rhs: ${vectype}) -> ${vectype} {
  return ${vectype}(_bits:
%if is_floating:
    Builtin.fsub_${llvm_vectype}(
%else:
    Builtin.sub_${llvm_vectype}(
%end
      lhs._vector, rhs._vector))
}

/// Negation of `rhs`.
@inline(__always)
@warn_unused_result
public prefix func -(rhs: ${vectype}) -> ${vectype} {
  return ${vectype}(0) ${wrap}- rhs
}

/// Elementwise product of `lhs` and `rhs`.  A.k.a. the Hadamard or Schur
/// product of the two vectors.
@inline(__always)
@warn_unused_result
public func ${wrap}*(lhs: ${vectype}, rhs: ${vectype}) -> ${vectype} {
  return ${vectype}(_bits:
%if is_floating:
    Builtin.fmul_${llvm_vectype}(
%else:
    Builtin.mul_${llvm_vectype}(
%end
      lhs._vector, rhs._vector))
}

/// Elementwise quotient of `lhs` and `rhs`.  This is the inverse operation
/// of the elementwise product.
@inline(__always)
@warn_unused_result
public func /(lhs: ${vectype}, rhs: ${vectype}) -> ${vectype} {
  return ${vectype}(_bits:
%if is_floating:
    Builtin.fdiv_${llvm_vectype}(
%else:
    Builtin.sdiv_${llvm_vectype}(
%end
      lhs._vector, rhs._vector))
}

/// Add `rhs` to `lhs`.
% if is_floating:
@inline(__always)
public func +=(inout lhs: ${vectype}, rhs: ${vectype}) -> Void {
  lhs = lhs + rhs
}

/// Subtract `rhs` from `lhs`.
@inline(__always)
public func -=(inout lhs: ${vectype}, rhs: ${vectype}) -> Void {
  lhs = lhs - rhs
}

/// Multiply `lhs` by `rhs` (elementwise).
@inline(__always)
public func *=(inout lhs: ${vectype}, rhs: ${vectype}) -> Void {
  lhs = lhs * rhs
}
% end

/// Divide `lhs` by `rhs` (elementwise).
@inline(__always)
public func /=(inout lhs: ${vectype}, rhs: ${vectype}) -> Void {
  lhs = lhs / rhs
}

/// Scalar-Vector product.
@inline(__always)
@warn_unused_result
public func ${wrap}*(lhs: ${type}, rhs: ${vectype}) -> ${vectype} {
  return ${vectype}(lhs) ${wrap}* rhs
}

/// Scalar-Vector product.
@inline(__always)
@warn_unused_result
public func ${wrap}*(lhs: ${vectype}, rhs: ${type}) -> ${vectype} {
  return lhs ${wrap}* ${vectype}(rhs)
}

% if is_floating:
/// Scales `lhs` by `rhs`.
@inline(__always)
public func *=(inout lhs: ${vectype}, rhs: ${type}) -> Void {
  lhs = lhs * rhs
}
% end

/// Elementwise absolute value of a vector.  The result is a vector of the same
/// length with all elements positive.
@inline(__always)
@warn_unused_result
public func abs(x: ${vectype}) -> ${vectype} {
  return ${vectype}(${', '.join(map(lambda c: \
                    'abs(x.' + c + ')', \
                    component[:size]))})
}

/// Elementwise minimum of two vectors.  Each component of the result is the
/// smaller of the corresponding component of the inputs.
@inline(__always)
@warn_unused_result
public func min(x: ${vectype}, _ y: ${vectype}) -> ${vectype} {
  return ${vectype}(${', '.join(map(lambda c:             \
                    'min(x.' + c + ',y.' + c + ')', \
                    component[:size]))})
}

/// Elementwise maximum of two vectors.  Each component of the result is the
/// larger of the corresponding component of the inputs.
@inline(__always)
@warn_unused_result
public func max(x: ${vectype}, _ y: ${vectype}) -> ${vectype} {
  return ${vectype}(${', '.join(map(lambda c:             \
                    'max(x.' + c + ',y.' + c + ')', \
                    component[:size]))})
}

/// Vector-scalar minimum.  Each component of the result is the minimum of the
/// corresponding element of the input vector and the scalar.
@inline(__always)
@warn_unused_result
public func min(vector: ${vectype}, _ scalar: ${type}) -> ${vectype} {
  return min(vector, ${vectype}(scalar))
}

/// Vector-scalar maximum.  Each component of the result is the maximum of the
/// corresponding element of the input vector and the scalar.
@inline(__always)
@warn_unused_result
public func max(vector: ${vectype}, _ scalar: ${type}) -> ${vectype} {
  return max(vector, ${vectype}(scalar))
}

/// Each component of the result is the corresponding element of `x` clamped to
/// the range formed by the corresponding elements of `min` and `max`.  Any
/// lanes of `x` that contain NaN will end up with the `min` value.
@inline(__always)
@warn_unused_result
public func clamp(x: ${vectype},
                  min: ${vectype},
                  max: ${vectype})
                    -> ${vectype} {
  return simd.min(simd.max(x, min), max)
}

/// Clamp each element of `x` to the range [`min`, max].  If any lane of `x` is
/// NaN, the corresponding lane of the result is `min`.
@inline(__always)
@warn_unused_result
public func clamp(x: ${vectype},
                  min: ${type},
                  max: ${type})
                    -> ${vectype} {
  return simd.min(simd.max(x, min), max)
}

/// Sum of the elements of the vector.
@inline(__always)
@warn_unused_result
public func reduce_add(x: ${vectype}) -> ${type} {
  return ${(' '+wrap+'+ ').join(map(lambda x:'x.'+x, component[:size]))}
}

/// Minimum element of the vector.
@inline(__always)
@warn_unused_result
public func reduce_min(x: ${vectype}) -> ${type} {
  return min(${', '.join(map(lambda x:'x.'+x, component[:size]))})
}

/// Maximum element of the vector.
@inline(__always)
@warn_unused_result
public func reduce_max(x: ${vectype}) -> ${type} {
  return max(${', '.join(map(lambda x:'x.'+x, component[:size]))})
}

%     if is_floating:

/// Sign of a vector.  Each lane contains -1 if the corresponding lane of `x`
/// is less than zero, +1 if the corresponding lane of `x` is greater than
/// zero, and 0 otherwise.
@inline(__always)
@warn_unused_result
public func sign(x: ${vectype}) -> ${vectype} {
  return ${vectype}(${', '.join(map(lambda c:  \
                    'sign(x.' + c + ')', \
                    component[:size]))})
}

/// Linear interpolation between `x` (at `t=0`) and `y` (at `t=1`).  May be
/// used with `t` outside of [0, 1] as well.
@inline(__always)
@warn_unused_result
public func mix(x: ${vectype}, _ y: ${vectype}, t: ${vectype}) -> ${vectype} {
  return x + t*(y-x)
}

/// Linear interpolation between `x` (at `t=0`) and `y` (at `t=1`).  May be
/// used with `t` outside of [0, 1] as well.
@inline(__always)
@warn_unused_result
public func mix(x: ${vectype}, _ y: ${vectype}, t: ${type}) -> ${vectype} {
  return x + t*(y-x)
}

/// Elementwise reciprocal.
@inline(__always)
@warn_unused_result
public func recip(x: ${vectype}) -> ${vectype} {
  return ${vectype}(${', '.join(map(lambda c:  \
                    'recip(x.' + c + ')', \
                    component[:size]))})
}

/// Elementwise reciprocal square root.
@inline(__always)
@warn_unused_result
public func rsqrt(x: ${vectype}) -> ${vectype} {
  return ${vectype}(${', '.join(map(lambda c:  \
                    'rsqrt(x.' + c + ')', \
                    component[:size]))})
}

/// Alternate name for minimum of two floating-point vectors.
@inline(__always)
@warn_unused_result
public func fmin(x: ${vectype}, _ y: ${vectype}) -> ${vectype} {
  return min(x, y)
}

/// Alternate name for maximum of two floating-point vectors.
@inline(__always)
@warn_unused_result
public func fmax(x: ${vectype}, _ y: ${vectype}) -> ${vectype} {
  return max(x, y)
}

/// Each element of the result is the smallest integral value greater than or
/// equal to the corresponding element of the input.
@inline(__always)
@warn_unused_result
public func ceil(x: ${vectype}) -> ${vectype} {
  return ${vectype}(${', '.join(map(lambda c:  \
                    'ceil(x.' + c + ')', \
                    component[:size]))})
}

/// Each element of the result is the largest integral value less than or equal
/// to the corresponding element of the input.
@inline(__always)
@warn_unused_result
public func floor(x: ${vectype}) -> ${vectype} {
  return ${vectype}(${', '.join(map(lambda c:  \
                    'floor(x.' + c + ')', \
                    component[:size]))})
}

/// Each element of the result is the closest integral value with magnitude
/// less than or equal to that of the corresponding element of the input.
@inline(__always)
@warn_unused_result
public func trunc(x: ${vectype}) -> ${vectype} {
  return ${vectype}(${', '.join(map(lambda c:  \
                    'trunc(x.' + c + ')', \
                    component[:size]))})
}

/// `x - floor(x)`, clamped to lie in the range [0,1).  Without this clamp step,
/// the result would be 1.0 when `x` is a very small negative number, which may
/// result in out-of-bounds table accesses in common usage.
@inline(__always)
@warn_unused_result
public func fract(x: ${vectype}) -> ${vectype} {
  return fmin(x - floor(x), ${vectype}(${one_minus_ulp[type]}))
}

/// 0.0 if `x < edge`, and 1.0 otherwise.
@inline(__always)
@warn_unused_result
public func step(x: ${vectype}, edge: ${vectype}) -> ${vectype} {
  return ${vectype}(${', '.join(map(lambda c:                 \
                    'step(x.' + c + ', edge: edge.' + c + ')', \
                    component[:size]))})
}

/// 0.0 if `x < edge0`, 1.0 if `x > edge1`, and cubic interpolation between
/// 0 and 1 in the interval [edge0, edge1].
@inline(__always)
@warn_unused_result
public func smoothstep(x: ${vectype},
                              edge0: ${vectype},
                              edge1: ${vectype})
                                -> ${vectype} {
  let t = clamp((x-edge0)/(edge1-edge0), min: 0, max: 1)
  return t*t*(${vectype}(3) - 2*t)
}

/// Dot product of `x` and `y`.
@inline(__always)
@warn_unused_result
public func dot(x: ${vectype}, _ y: ${vectype}) -> ${type} {
  return reduce_add(x*y)
}

/// Projection of `x` onto `y`.
@inline(__always)
@warn_unused_result
public func project(x: ${vectype}, _ y: ${vectype}) -> ${vectype} {
  return dot(x,y)/dot(y,y)*y
}

/// Length of `x`, squared.  This is more efficient to compute than the length,
/// so you should use it if you only need to compare lengths to each other.
/// I.e. instead of writing:
///
///   if (length(x) < length(y)) { ... }
///
/// use:
///
///   if (length_squared(x) < length_squared(y)) { ... }
///
/// Doing it this way avoids one or two square roots, which is a fairly costly
/// operation.
@inline(__always)
@warn_unused_result
public func length_squared(x: ${vectype}) -> ${type} {
  return dot(x,x)
}

/// Length (two-norm or "Euclidean norm") of `x`.
@inline(__always)
@warn_unused_result
public func length(x: ${vectype}) -> ${type} {
  return sqrt(length_squared(x))
}

/// The one-norm (or "taxicab norm") of `x`.
@inline(__always)
@warn_unused_result
public func norm_one(x: ${vectype}) -> ${type} {
  return reduce_add(abs(x))
}

/// The infinity-norm (or "sup norm") of `x`.
@inline(__always)
@warn_unused_result
public func norm_inf(x: ${vectype}) -> ${type} {
  return reduce_max(abs(x))
}

/// Distance between `x` and `y`, squared.
@inline(__always)
@warn_unused_result
public func distance_squared(x: ${vectype}, _ y: ${vectype}) -> ${type} {
  return length_squared(x - y)
}

/// Distance between `x` and `y`.
@inline(__always)
@warn_unused_result
public func distance(x: ${vectype}, _ y: ${vectype}) -> ${type} {
  return length(x - y)
}

/// Unit vector pointing in the same direction as `x`.
@inline(__always)
@warn_unused_result
public func normalize(x: ${vectype}) -> ${vectype} {
  return x * rsqrt(length_squared(x))
}

/// `x` reflected through the hyperplane with unit normal vector `n`, passing
/// through the origin.  E.g. if `x` is [1,2,3] and `n` is [0,0,1], the result
/// is [1,2,-3].
@inline(__always)
@warn_unused_result
public func reflect(x: ${vectype}, n: ${vectype}) -> ${vectype} {
  return x - 2*dot(x,n)*n
}

/// The refraction direction given unit incident vector `x`, unit surface
/// normal `n`, and index of refraction `eta`.  If the angle between the
/// incident vector and the surface is so small that total internal reflection
/// occurs, zero is returned.
@inline(__always)
@warn_unused_result
public func refract(x: ${vectype},
                           n: ${vectype},
                           eta: ${type})
                             -> ${vectype} {
  let k = 1 - eta*eta*(1 - dot(x,n)*dot(x,n))
  if k >= 0 { return eta*x - (eta*dot(x,n) + sqrt(k))*n }
  return ${vectype}(0)
}

%     end # if is_floating
%   end # for size in [2, 3, 4]
%   if is_floating:
//  Scalar versions of common operations:

/// Returns -1 if `x < 0`, +1 if `x > 0`, and 0 otherwise (`sign(NaN)` is 0).
@inline(__always)
@warn_unused_result
public func sign(x: ${type}) -> ${type} {
  return x < 0 ? -1 : (x > 0 ? 1 : 0)
}

/// Reciprocal.
@inline(__always)
@warn_unused_result
public func recip(x: ${type}) -> ${type} { return 1/x }

/// Reciprocal square root.
@inline(__always)
@warn_unused_result
public func rsqrt(x: ${type}) -> ${type} { return 1/sqrt(x) }

/// Returns 0.0 if `x < edge`, and 1.0 otherwise.
@inline(__always)
@warn_unused_result
public func step(x: ${type}, edge: ${type}) -> ${type} {
  return x < edge ? 0.0 : 1.0
}

/// Interprets two two-dimensional vectors as three-dimensional vectors in the
/// xy-plane and computes their cross product, which lies along the z-axis.
@inline(__always)
@warn_unused_result
public func cross(x: ${ctype[type]}2,
                       _ y: ${ctype[type]}2)
                         -> ${ctype[type]}3 {
  return ${ctype[type]}3(0, 0, x.x*y.y - x.y*y.x)
}

/// Cross-product of two three-dimensional vectors.  The resulting vector is
/// perpendicular to the plane determined by `x` and `y`, with length equal to
/// the oriented area of the parallelogram they determine.
@inline(__always)
@warn_unused_result
public func cross(x: ${ctype[type]}3,
                       _ y: ${ctype[type]}3)
                         -> ${ctype[type]}3 {
  return ${ctype[type]}3(x.y*y.z - x.z*y.y, x.z*y.x - x.x*y.z, x.x*y.y - x.y*y.x)
}

%   else: # !is_floating

// Integer vector types only support wrapping arithmetic. Make the non-wrapping
// operators unavailable so that fixits guide users to the unchecked operations.

@available(*, unavailable, renamed="&+",
           message="integer vector types do not support checked arithmetic; use the wrapping operations instead")
public func +(x: ${vectype}, y: ${vectype}) -> ${vectype} {
  fatalError("unavailable function cannot be called")
}

@available(*, unavailable, renamed="&-",
           message="integer vector types do not support checked arithmetic; use the wrapping operations instead")
public func -(x: ${vectype}, y: ${vectype}) -> ${vectype} {
  fatalError("unavailable function cannot be called")
}

@available(*, unavailable, renamed="&*",
           message="integer vector types do not support checked arithmetic; use the wrapping operations instead")
public func *(x: ${vectype}, y: ${vectype}) -> ${vectype} {
  fatalError("unavailable function cannot be called")
}

@available(*, unavailable, renamed="&*",
           message="integer vector types do not support checked arithmetic; use the wrapping operations instead")
public func *(x: ${vectype}, y: ${type}) -> ${vectype} {
  fatalError("unavailable function cannot be called")
}

@available(*, unavailable, renamed="&*",
           message="integer vector types do not support checked arithmetic; use the wrapping operations instead")
public func *(x: ${type}, y: ${vectype}) -> ${vectype} {
  fatalError("unavailable function cannot be called")
}

@available(*, unavailable,
           message="integer vector types do not support checked arithmetic; use the wrapping operation 'x = x &+ y' instead")
public func +=(inout x: ${vectype}, y: ${vectype}) {
  fatalError("unavailable function cannot be called")
}

@available(*, unavailable,
           message="integer vector types do not support checked arithmetic; use the wrapping operation 'x = x &- y' instead")
public func -=(inout x: ${vectype}, y: ${vectype}) {
  fatalError("unavailable function cannot be called")
}

@available(*, unavailable,
           message="integer vector types do not support checked arithmetic; use the wrapping operation 'x = x &* y' instead")
public func *=(inout x: ${vectype}, y: ${vectype}) {
  fatalError("unavailable function cannot be called")
}

@available(*, unavailable,
           message="integer vector types do not support checked arithmetic; use the wrapping operation 'x = x &* y' instead")
public func *=(inout x: ${vectype}, y: ${type}) {
  fatalError("unavailable function cannot be called")
}

%   end # is_floating
% end # for type in scalar_types

% for type in floating_types:
%   for rows in [2,3,4]:
//      Workaround <rdar://problem/18900352>
%     for cols in [2,3,4]:
//      Workaround <rdar://problem/18900352>
%       mattype = ctype[type] + str(cols) + 'x' + str(rows)
%       diagsize = rows if rows < cols else cols
%       coltype = ctype[type] + str(rows)
%       rowtype = ctype[type] + str(cols)
%       diagtype = ctype[type] + str(diagsize)
%       transtype = ctype[type] + str(rows) + 'x' + str(cols)
%       cmatrix = 'matrix_' + mattype

public struct ${mattype} : CustomDebugStringConvertible {

  internal var _columns: (${', '.join([coltype]*cols)})

  /// Initialize matrix to zero.
  public init() {
%   for i in range(cols):
    _columns.${i} = ${coltype}()
%   end
  }

  /// Initialize matrix to have `scalar` on main diagonal, zeros elsewhere.
  public init(_ scalar: ${type}) {
    self.init(diagonal: ${diagtype}(scalar))
  }

  /// Initialize matrix to have specified `diagonal`, and zeros elsewhere.
  public init(diagonal: ${diagtype}) {
%   for i in range(cols):
    self._columns.${i} = ${coltype}()
%   end
%   for i in range(diagsize):
    self._columns.${i}.${component[i]} = diagonal.${component[i]}
%   end
  }

  /// Initialize matrix to have specified `columns`.
  public init(_ columns: [${coltype}]) {
    _precondition(columns.count == ${cols}, "Requires array of ${cols} vectors")
%   for i in range(cols):
    self._columns.${i} = columns[${i}]
%   end
  }

  /// Initialize matrix to have specified `rows`.
  public init(rows: [${rowtype}]) {
    _precondition(rows.count == ${rows}, "Requires array of ${rows} vectors")
%   for i in range(cols):
      self._columns.${i} = [${', '.join(map(lambda j:
                            'rows[' + str(j) + '].' + component[i],
                            range(rows)))}]
%   end
  }

  /// Initialize matrix to have specified `columns`.
  internal init(${', '.join(map(lambda i:
                '_ col' + str(i) + ': ' + coltype,
                range(cols)))}) {
%   for i in range(cols):
      self._columns.${i} = col${i}
%   end
  }

  /// Initialize matrix from corresponding C matrix type.
  public init(_ cmatrix: ${cmatrix}) {
    self = unsafeBitCast(cmatrix, ${mattype}.self)
  }

  /// Get the matrix as the corresponding C matrix type.
  public var cmatrix: ${cmatrix} {
    get { return unsafeBitCast(self, ${cmatrix}.self) }
  }

  /// Access to individual columns.
  public subscript(column: Int) -> ${coltype} {
    get {
      switch(column) {
% for i in range(cols):
      case ${i}: return _columns.${i}
% end
      default: _preconditionFailure("Column index out of range")
      }
    }
    set (value) {
      switch(column) {
% for i in range(cols):
      case ${i}: _columns.${i} = value
% end
      default: _preconditionFailure("Column index out of range")
      }
    }
  }

  /// Access to individual elements.
  public subscript(column: Int, row: Int) -> ${type} {
    get { return self[column][row] }
    set (value) { self[column][row] = value }
  }

  public var debugDescription: String {
    return "${mattype}([${', '.join(map(lambda i: \
                        '\(_columns.' + str(i) + '._descriptionAsArray)',
                        range(cols)))}])"
  }

  /// Transpose of the matrix.
  public var transpose: ${transtype} {
    get {
      return ${transtype}([
% for i in range(rows):
        [${', '.join(map(lambda j: \
         'self[' + str(j) + ',' + str(i) + ']', \
         range(cols)))}],
% end # for i in range(rows)
      ])
    }
  }

% if rows == cols:
  /// Inverse of the matrix if it exists, otherwise the contents of the
  /// resulting matrix are undefined.
  public var inverse: ${mattype} {
    get {
      % inverse_func = '__invert_' + ('f' if type == 'Float' else 'd') + str(cols)
      return ${mattype}(${inverse_func}(self.cmatrix))
    }
  }
% end
}

/// Sum of two matrices.
@warn_unused_result
public func +(lhs: ${mattype}, rhs: ${mattype}) -> ${mattype} {
  return ${mattype}(${', '.join(map(lambda i: \
                    'lhs._columns.'+str(i)+' + rhs._columns.'+str(i), \
                    range(cols)))})
}

/// Negation of a matrix.
@warn_unused_result
public prefix func -(rhs: ${mattype}) -> ${mattype} {
  return ${mattype}(${', '.join(map(lambda i: \
                    '-rhs._columns.'+str(i),  \
                    range(cols)))})
}

/// Difference of two matrices.
@warn_unused_result
public func -(lhs: ${mattype}, rhs: ${mattype}) -> ${mattype} {
  return ${mattype}(${', '.join(map(lambda i: \
                    'lhs._columns.'+str(i)+' - rhs._columns.'+str(i), \
                    range(cols)))})
}

public func +=(inout lhs: ${mattype}, rhs: ${mattype}) -> Void {
  lhs = lhs + rhs
}

public func -=(inout lhs: ${mattype}, rhs: ${mattype}) -> Void {
  lhs = lhs - rhs;
}

/// Scalar-Matrix multiplication.
@warn_unused_result
public func *(lhs: ${type}, rhs: ${mattype}) -> ${mattype} {
  return ${mattype}(${', '.join(map(lambda i: \
                    'lhs*rhs._columns.'+str(i), \
                    range(cols)))})
}

/// Matrix-Scalar multiplication.
@warn_unused_result
public func *(lhs: ${mattype}, rhs: ${type}) -> ${mattype} {
  return rhs*lhs
}

public func *=(inout lhs: ${mattype}, rhs: ${type}) -> Void {
  lhs = lhs*rhs
}

/// Matrix-Vector multiplication.  Keep in mind that matrix types are named
/// `${type}NxM` where `N` is the number of *columns* and `M` is the number of
/// *rows*, so we multiply a `${type}3x2 * ${type}3` to get a `${type}2`, for
/// example.
@warn_unused_result
public func *(lhs: ${mattype}, rhs: ${rowtype}) -> ${coltype} {
  return ${' + '.join(map(lambda i: \
         'lhs._columns.'+str(i)+'*rhs.'+component[i], \
         range(cols)))}
}

/// Vector-Matrix multiplication.
@warn_unused_result
public func *(lhs: ${coltype}, rhs: ${mattype}) -> ${rowtype} {
  return ${rowtype}(${', '.join(map(lambda i: \
                    'dot(lhs, rhs._columns.'+str(i)+')', \
                    range(cols)))})
}

% for k in [2,3,4]:
/// Matrix multiplication (the "usual" matrix product, not the elementwise
/// product).
%   lhstype = ctype[type] + str(k) + 'x' + str(rows)
%   rhstype = ctype[type] + str(cols) + 'x' + str(k)
@warn_unused_result
public func *(lhs: ${lhstype}, rhs: ${rhstype}) -> ${mattype} {
  return ${mattype}(${', '.join(map(lambda i: \
                    'lhs*rhs._columns.'+str(i), \
                    range(cols)))})
}

% end # for k in [2,3,4]

%   rhstype = ctype[type] + str(cols) + 'x' + str(cols)
/// Matrix multiplication (the "usual" matrix product, not the elementwise
/// product).
public func *=(inout lhs: ${mattype}, rhs: ${rhstype}) -> Void {
  lhs = lhs*rhs
}

% end # for cols in [2,3,4]
% end # for rows in [2,3,4]
% end # for type in floating_types
